02-06/12/2019
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.3 ## ✔ readr 1.3.1 ✔ stringr 1.4.0 ## ✔ tibble 2.1.3 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ── ## ✖ magrittr::extract() masks tidyr::extract() ## ✖ dplyr::filter() masks plotly::filter(), stats::filter() ## ✖ kableExtra::group_rows() masks dplyr::group_rows() ## ✖ magrittr::inset() masks ggmap::inset() ## ✖ dplyr::lag() masks stats::lag() ## ✖ purrr::set_names() masks magrittr::set_names()
library(ggthemes)
# for the afternoon, we need to install rgdal.
# install.packages('leaflet')
dat <- read.csv('data/fatal-police-shootings-data.csv')
new_dat <- dat %>% group_by(flee) %>% summarise(counts = n()) %>% filter(flee != '')
ggplot(data = new_dat, aes(x = flee, counts)) +
geom_bar(stat = 'identity') + labs(x = '', y = 'Counts', title = 'People killed by police') + theme_clean()
summary(dat)
## id name date ## Min. : 3 TK TK : 104 2018-04-01: 10 ## 1st Qu.:1125 Michael Johnson : 3 2018-01-06: 9 ## Median :2168 Andrew Kana : 2 2018-02-01: 9 ## Mean :2176 Angel Uolla : 2 2018-06-29: 9 ## 3rd Qu.:3230 Brandon Jones : 2 2015-07-07: 8 ## Max. :4279 Christian Chavez: 2 2015-12-14: 8 ## (Other) :3767 (Other) :3829 ## manner_of_death armed age gender race ## shot :3672 gun :2149 Min. : 6.00 : 3 : 344 ## shot and Tasered: 210 knife : 568 1st Qu.:27.00 F: 174 A: 61 ## unarmed : 255 Median :35.00 M:3705 B: 918 ## : 249 Mean :36.85 H: 651 ## undetermined: 166 3rd Qu.:45.00 N: 62 ## toy weapon : 143 Max. :91.00 O: 36 ## (Other) : 352 NA's :138 W:1810 ## city state signs_of_mental_illness threat_level ## Los Angeles: 58 CA : 597 False:2970 attack :2445 ## Phoenix : 57 TX : 335 True : 912 other :1235 ## Houston : 39 FL : 236 undetermined: 202 ## Las Vegas : 36 AZ : 194 ## Columbus : 32 CO : 133 ## Chicago : 31 GA : 125 ## (Other) :3629 (Other):2262 ## flee body_camera ## : 136 False:3456 ## Car : 614 True : 426 ## Foot : 484 ## Not fleeing:2521 ## Other : 127 ## ##
new_dat <- dat %>% group_by(flee, threat_level) %>% summarise(counts = n()) %>% filter(flee != '') ggplot(data = new_dat, aes(x = flee, y = counts, fill = threat_level)) + geom_bar(stat = 'identity', position = 'dodge') + labs(x = '', y = 'Counts', title = 'People killed by police') + theme_clean()
new_dat <- dat %>% group_by(race, threat_level) %>% summarise(counts = n()) %>% filter(race != '') ggplot(data = new_dat, aes(x = race, y = counts, fill = threat_level)) + geom_bar(stat = 'identity', position = 'dodge') + labs(x = '', y = 'Counts', title = 'People killed by police') + theme_clean()
new_dat <- dat %>% group_by(race) %>% summarise(mean_age = mean(age, na.rm = TRUE)) %>% filter(race != '') ggplot(data = new_dat, aes(x = race, y = mean_age)) + geom_bar(stat = 'identity', position = 'dodge') + labs(x = '', y = 'Avg age', title = 'People killed by police') + theme_clean()
new_dat <- dat %>% group_by(race, threat_level) %>% summarise(mean_age = mean(age, na.rm = TRUE)) %>% filter(race != '') ggplot(data = new_dat, aes(x = race, y = mean_age, fill = threat_level)) + geom_bar(stat = 'identity', position = 'dodge') + labs(x = '', y = 'Avg age', title = 'People killed by police') + theme_clean()
dat <- gapminder
dat <- dat %>% filter(year == '2007')
ggplot(data = dat, aes(gdpPercap, lifeExp)) +
geom_point(size = 2, alpha = 0.4) +
labs(x = 'GDP per capita',
y = 'Life expectancy',
title = 'GDP per capita and Life expectancy') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% filter(year == '2007')
ggplot(data = dat, aes(gdpPercap, lifeExp)) +
geom_point(aes(size = pop), alpha = 0.4) +
labs(x = 'GDP per capita',
y = 'Life expectancy',
title = 'GDP per capita and Life expectancy') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% filter(year == '2007')
ggplot(data = dat, aes(gdpPercap, lifeExp)) +
geom_point(aes(size = pop, color = continent), alpha = 0.4) +
labs(x = 'GDP per capita',
y = 'Life expectancy',
title = 'GDP per capita and Life expectancy') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, continent) %>% summarise(avg_gdp = mean(gdpPercap),
avg_life_exp = mean(lifeExp),
avg_pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, avg_pop)) +
geom_point(aes(color = continent)) +
geom_line(aes(color = continent)) +
labs(x = 'Year',
y = 'Population',
title = 'Year and population by continent') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, continent) %>% summarise(avg_gdp = mean(gdpPercap),
avg_life_exp = mean(lifeExp),
avg_pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, avg_pop)) +
geom_point(aes(color = continent, size = avg_gdp)) +
geom_line(aes(color = continent)) +
labs(x = 'Year',
y = 'Population',
title = 'Year and population by continent') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, continent) %>% summarise(avg_gdp = mean(gdpPercap),
avg_life_exp = mean(lifeExp),
avg_pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, avg_pop)) +
geom_point(aes(color = continent, size = avg_life_exp)) +
geom_line(aes(color = continent)) +
labs(x = 'Year',
y = 'Population',
title = 'Year and population by continent') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, country) %>% filter(continent == 'Asia' & country %in% c('China', 'Japan', 'Singapore', 'Korea, Rep.')) %>% summarise(gdp = mean(gdpPercap),
life_exp = mean(lifeExp),
pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, gdp)) +
geom_point(aes(color = country, size = pop)) +
geom_line(aes(color = country)) +
labs(x = 'Year',
y = 'GDP per capita',
title = 'Year and GDP') + theme_clean()
(Practice exercises)
www.databrew.cc/exercises3
map('world')
abline(h = 0, col = 'red')
abline(v = 0, col = 'blue')
Choropleth
Choropleth
Point
Point density
Point density
Point density
Administrative
Population cartogram
Elevation maps
library(leaflet)
mosquito <- rio::import("data/mosquito_habitat.rds") # Note: This are fake data
# Print the variable names
names(mosquito)
## [1] "habitat_id" "latitude" "longitude" "anophel_larvae"
mosquito[4:6,]
## habitat_id latitude longitude anophel_larvae ## 451 MHM-0004 -25.52311 32.80695 2 ## 331 MHM-0005 -25.52289 32.84260 0 ## 459 MHM-0006 -25.52044 32.81731 0
library(leaflet) leaflet(mosquito, width = 1000, height = 600) %>% addTiles() %>% # Add default OpenStreetMap map tiles addCircleMarkers(lng = ~longitude, lat = ~latitude)
leaflet(mosquito, width = 1000, height = 600) %>%
addTiles() %>%
addCircleMarkers(lng = ~longitude,lat = ~latitude,
color = ~ifelse(anophel_larvae>0,'red','green'),
popup = ~paste0('Habitat: ', habitat_id,
', Number of anophel larvae: ', anophel_larvae),
label = ~habitat_id,
radius = 7,stroke = F,fillOpacity = 0.5)
data(quakes) dat <- quakes # Show first 20 rows from the `quakes` dataset leaflet(data = dat) %>% addTiles() %>% addMarkers(~long, ~lat, popup = ~as.character(mag), label = ~as.character(mag))